from bs4 import BeautifulSoup
import requests
from requests_html import HTMLSession

SEARCH_LIST = ["IMF", "Uluslararası Para Fonu", "Dış Güçler", "Avrupa Birliği", "Faiz Lobisi", "Küresel", "Amerika", "ABD", "Paralel Devlet", "Gezi Parkı", "15 Temmuz", "Stokçu", "Kredi Derecelendirme", "Fitch", "Moody's", "Standard & Poor's"]
href_list = []

"""
for SEARCHED in SEARCH_LIST:
    
    i = 1
    main_href = "https://www.tccb.gov.tr/Search?s="+SEARCHED+"&p0="+SEARCHED+"&p1=-410--365--356--353--1709--1695--355--354-&sT=1&p2=28.08.2014&p3=08.09.2021&presidentId=12&page="+str(i)
    html_text = requests.get(main_href).text
    soup = BeautifulSoup(html_text, "lxml")
    elements = soup.find("div", id = "list-press-address").find_all("dl")
    while elements != []:
        for element in elements:
            href = "https://tccb.gov.tr"+element.dd.a["href"]
            if href not in href_list:
                href_list.append(href)
        i += 1
        main_href = "https://www.tccb.gov.tr/Search?s="+SEARCHED+"&p0="+SEARCHED+"&p1=-410--365--356--353--1709--1695--355--354-&sT=1&p2=28.08.2014&p3=08.09.2021&presidentId=12&page="+str(i)
        html_text = requests.get(main_href).text
        soup = BeautifulSoup(html_text, "lxml")
        elements = soup.find("div", id = "list-press-address").find_all("dl")

file = open("tccb2.txt", "w")
for href in href_list:
    write_str = ""
    
    try:
        element_text = requests.get(href).text
        inner_soup = BeautifulSoup(element_text, "lxml")
        write_str += inner_soup.find("div", id = "news-detail").h1.span.text+"\n"
        write_str += inner_soup.find("div", id = "news-detail").h6.text+"\n"
        content = inner_soup.find("div", id = "news-detail-body").find_all("p")
        for i in content:
            try:
                write_str += i.strong.text+"\n"
            except:
                write_str += i.text+"\n"
        write_str += "\n\n\n\n\n\n"
        file.write(write_str)
    
    except Exception:
        print(href)
        
file.close()
"""

"""
SEARCH_LIST_ILETISIM = ["691197451e77f3268b102d66c2985918", "ceb9ff3e536a7492aace27701e4a0825",
                        "99713bce7639523b8117b6dc3436b52a", "6bfefa7c516f5e10eac677aba5ed5d8b",
                        "82c867e95ad134d2c2f43afb6b46144d", "ad9f4ef405c72113ccc7002ae4ae66f2",
                        "664c3a7bda292113fe51ed12123e1197", "9f537ba8fe1cb8ec7176872b3873df36",
                        "6f1ff7ad359dfdf4303535e36a242db7", "668feba1724357ee975e70c4a3936d9d",
                        "c842dff183e8113e95593184d1f1426e", "d41bab1b5df760c42bd954655cf73083",
                        "42b27e37ee75b530577ec306bb031c24",  #No results for Fitch 
                        "dbf6904a7f463872e1aec71f17d61e3f"]  #No results for Standard & Poor's

for SEARCHED in SEARCH_LIST_ILETISIM:
    i=0
    main_href = "https://www.iletisim.gov.tr/turkce/arama_sonuclari/"+SEARCHED
    html_text = requests.get(main_href).text
    soup = BeautifulSoup(html_text, "lxml")
    elements = soup.find_all("div", class_ = "entry__header")
    while elements != []:
        for element in elements:
            href = element.h2.a["href"]
            if href not in href_list:
                href_list.append(href)
        i += 1
        main_href = "https://www.iletisim.gov.tr/turkce/arama_sonuclari/"+SEARCHED+"/P"+str(i*9)
        html_text = requests.get(main_href).text
        soup = BeautifulSoup(html_text, "lxml")
        elements = soup.find_all("div", class_ = "entry__header")

file = open("iletisim2.txt", "w")
for href in href_list:
    write_str = ""
    
    try:
        element_text = requests.get(href).text
        inner_soup = BeautifulSoup(element_text, "lxml")
        write_str += inner_soup.find("h2", class_ = "page-title text-center mt-0 mb-1").text+"\n"
        write_str += inner_soup.find("li", class_ = "entry__meta-date").text+"\n"
        content = inner_soup.find("div", id = "content_original_tr_TR", class_ = "agenda_content").find_all("p")
        for i in content:
            try:
                write_str += i.strong.text+"\n"
            except:
                write_str += i.text+"\n"
        write_str += "\n\n\n\n\n\n"
        file.write(write_str)
        
    except Exception:
        print(href)

file.close()
"""

"""
for SEARCHED in SEARCH_LIST:
    
    i = 1
    main_href = "https://ticaret.gov.tr/Web/Ara?page="+str(i)+"&text="+SEARCHED
    html_text = requests.get(main_href).text
    soup = BeautifulSoup(html_text, "lxml")
    elements = soup.find_all("a", class_ = "list-group-item list-group-item-action flex-column align-items-start")
    while elements != []:
        for element in elements:
            element = element["href"]
            if element[-4:].lower() == ".pdf" or element[-4:].lower() == ".zip" or element[-4:].lower() == ".htm":
                continue
            if element[:8] == "https://":
                href = element
            else:
                href = "https://ticaret.gov.tr"+element
            if href not in href_list:
                href_list.append(href)
        i += 1
        main_href = "https://ticaret.gov.tr/Web/Ara?page="+str(i)+"&text="+SEARCHED
        html_text = requests.get(main_href).text
        soup = BeautifulSoup(html_text, "lxml")
        elements = soup.find_all("a", class_ = "list-group-item list-group-item-action flex-column align-items-start")

file = open("ticaret2.txt", "w")
for href in href_list:
    write_str = ""
    try:
        element_text = requests.get(href).text
        inner_soup = BeautifulSoup(element_text, "lxml")
        try:
            header = inner_soup.find("div", class_="__header")
        except Exception:
            header = inner_soup.find("div", class_="__header with-image")
        write_str += header.h2.text+"\n"
        write_str += header.p.text+"\n"
        write_str += header.span.text+"\n"
        content = inner_soup.find("div", class_="__content")
        write_str += content.get_text()+"\n"
        write_str += "\n\n\n\n\n\n"
        file.write(write_str)
    except Exception:
        print(href)

file.close()
"""
